Price to Earnings Anlaysis
library(rvest)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
# Read in data
# PE Ratio
PE_URL <- read_html('https://www.multpl.com/s-p-500-pe-ratio/table/by-month')
allTables <- PE_URL %>% html_table(fill = TRUE, header = TRUE)
P_E_Ratio <- allTables[[1]]
colnames(P_E_Ratio)[2] <- 'P/E Ratio'
# Cleaning Text and Date in columns
P_E_Ratio$`P/E Ratio` <- gsub(pattern = '\n\nestimate', replacement = '', x = P_E_Ratio$`P/E Ratio`)
class(P_E_Ratio$Date)
## [1] "character"
P_E_Ratio$Date <- as.Date(P_E_Ratio$Date, format = '%B %d, %Y')
P_E_Ratio <- P_E_Ratio[nrow(P_E_Ratio):1,]
P_E_Ratio$`P/E Ratio` <- as.numeric(P_E_Ratio$`P/E Ratio`)
str(P_E_Ratio)
## tibble [1,814 × 2] (S3: tbl_df/tbl/data.frame)
## $ Date : Date[1:1814], format: "1871-01-01" "1871-02-01" ...
## $ P/E Ratio: num [1:1814] 11.1 11.2 11.5 11.8 12.2 ...
head(P_E_Ratio)
## # A tibble: 6 × 2
## Date `P/E Ratio`
## <date> <dbl>
## 1 1871-01-01 11.1
## 2 1871-02-01 11.2
## 3 1871-03-01 11.5
## 4 1871-04-01 11.8
## 5 1871-05-01 12.2
## 6 1871-06-01 12.0
summary(P_E_Ratio)
## Date P/E Ratio
## Min. :1871-01-01 Min. : 5.31
## 1st Qu.:1908-10-08 1st Qu.: 11.49
## Median :1946-07-16 Median : 14.88
## Mean :1946-07-17 Mean : 15.97
## 3rd Qu.:1984-04-23 3rd Qu.: 18.23
## Max. :2022-02-11 Max. :123.73
P_E.ts <- ts(P_E_Ratio$`P/E Ratio`, start = c(1871, 1), end = c(2022, 2), freq = 12)
P_E.window.ts <- window(P_E.ts, start = c(2000, 1), end = c(2022, 2))
# Acf Plot
Acf(P_E.window.ts, lag.max = 12, main = '', na.action = na.pass)

# PE Plot
plot(P_E.window.ts, ylim = c(0, 150), ylab = "PE Ratio", xlab = "Time", bty = "l",
xaxt = "n", xlim = c(2000,2029), main = "")
axis(1, at = seq(2000, 2027.5, 1), labels = format(seq(2000, 2027.5, 1)))
lines(c(2022.17 - 1, 2022.17 - 1), c(0, 130))
lines(c(2022.17, 2022.17), c(0, 130))
text(2009, 140, "Training")
text(2021.5, 140, "Validation")
text(2026, 140, "Future")
arrows(2000, 130, 2022.17 - 2, 130, code = 3, length = 0.1, lwd = 1,angle = 30)
arrows(2022.17 - 1, 130, 2022.17, 130, code = 3, length = 0.1, lwd = 1,angle = 30)
arrows(2023.17, 130, 2028, 130, code = 3, length = 0.1, lwd = 1, angle = 30)

# Test for random walk
fit <- Arima(P_E.window.ts, order=c(1,0,0))
fit
## Series: P_E.window.ts
## ARIMA(1,0,0) with non-zero mean
##
## Coefficients:
## ar1 mean
## 0.9628 26.1142
## s.e. 0.0152 6.2424
##
## sigma^2 estimated as 17.25: log likelihood=-756.51
## AIC=1519.03 AICc=1519.12 BIC=1529.78
fit2 <- Arima(diff(P_E_Ratio$`P/E Ratio`, 1), order=c(1,0,0))
fit2
## Series: diff(P_E_Ratio$`P/E Ratio`, 1)
## ARIMA(1,0,0) with non-zero mean
##
## Coefficients:
## ar1 mean
## 0.5631 0.0076
## s.e. 0.0194 0.0762
##
## sigma^2 estimated as 2.017: log likelihood=-3207.72
## AIC=6421.45 AICc=6421.46 BIC=6437.95
fit3 <- auto.arima(P_E.window.ts)
fit3
## Series: P_E.window.ts
## ARIMA(5,0,2)(0,0,1)[12] with non-zero mean
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1 ma2 sma1
## -0.0200 0.9532 0.6600 -0.4976 -0.3494 1.4863 0.7518 0.2069
## s.e. 0.1308 0.1054 0.0972 0.0635 0.0744 0.1284 0.1146 0.0771
## mean
## 25.9375
## s.e. 2.7020
##
## sigma^2 estimated as 8.696: log likelihood=-662.95
## AIC=1345.89 AICc=1346.76 BIC=1381.73
Acf(diff(P_E_Ratio$`P/E Ratio`), lag.max = 12, main = '')

# Moving average
ma.trailing <- rollmean(P_E.window.ts, k = 12, align = 'right')
ma.centered <- ma(P_E.window.ts, order = 12)
plot(P_E.window.ts, ylim = c(0, 130), ylab = 'PE Ratio', xlab = 'Time', bty = 'l', xaxt = 'n', xlim = c(2000,2022.5), main = '')
axis(1, at = seq(2000, 2022.5, 1), labels = format(seq(2000, 2022.5, 1)))
lines(ma.centered, lwd = 2)
lines(ma.trailing, lwd = 2, lty = 2)
legend(2013,120, c('PE Ratio','Centered Moving Average', 'Trailing Moving Average'), lty=c(1,1,2), lwd=c(1,2,2), bty = 'n')

# Data partition
nValid <- 12
nTrain <- length(P_E.window.ts) - nValid
train.ts <- window(P_E.window.ts, start = c(2000, 1), end = c(2000, nTrain))
valid.ts <- window(P_E.window.ts, start = c(2000, nTrain + 1), end = c(2000, nTrain + nValid))
# Naive model
naive.pred <- naive(train.ts, h = nValid)
# Seasonal naive model
snaive.pred <- snaive(train.ts, h = nValid)
# Trailing moving average model
ma.trailing <- rollmean(train.ts, k = 12, align = 'right')
last.ma <- tail(ma.trailing, 1)
ma.trailing.pred <- ts(rep(last.ma, nValid), start = c(2000, nTrain + 1), end = c(2000, nTrain + nValid), freq = 12)
# Polynomial trend + seasonality model
train.lm.trend.season <- tslm(train.ts ~ trend + I(trend^2) + season)
train.lm.trend.season.pred<-forecast(train.lm.trend.season, h = nValid, level = 0)
# Apply ARIMA on Polynomial trend + seasonality model's residuals
train.res.arima <- Arima(train.lm.trend.season$residuals, order = c(8,0,0))
train.res.arima.pred <- forecast(train.res.arima, h = nValid)
# Holt-Winter model
hwin <- ets(train.ts)
hwin.pred <- forecast(hwin, h = nValid, level = 0)
# Auto ARIMA model
train.arima <- auto.arima(train.ts)
summary(train.arima)
## Series: train.ts
## ARIMA(5,0,0) with non-zero mean
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 mean
## 1.4708 -0.4747 0.3458 -0.6963 0.2942 25.9195
## s.e. 0.0596 0.1011 0.1031 0.1006 0.0595 3.0566
##
## sigma^2 estimated as 9.078: log likelihood=-639.79
## AIC=1293.58 AICc=1294.04 BIC=1318.34
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.005109035 2.977176 1.348578 -0.794982 4.727781 0.1322103
## ACF1
## Training set -0.003004687
train.arima.pred <-forecast(train.arima, h = nValid, level = 0)
# TBATS model
train.tbats <- tbats(train.ts)
train.tbats.pred <- forecast(train.tbats, h = nValid, level = 0)
summary(train.tbats)
## Length Class Mode
## lambda 1 -none- numeric
## alpha 1 -none- numeric
## beta 1 -none- numeric
## damping.parameter 1 -none- numeric
## gamma.values 0 -none- NULL
## ar.coefficients 1 -none- numeric
## ma.coefficients 1 -none- numeric
## likelihood 1 -none- numeric
## optim.return.code 1 -none- numeric
## variance 1 -none- numeric
## AIC 1 -none- numeric
## parameters 2 -none- list
## seed.states 4 -none- numeric
## fitted.values 254 ts numeric
## errors 254 ts numeric
## x 1016 -none- numeric
## seasonal.periods 0 -none- NULL
## y 254 ts numeric
## call 2 -none- call
## series 1 -none- character
## method 1 -none- character
# Plot model forecasts
plot(P_E.window.ts, xlab = 'Time', ylab = 'PE Ratio of S&P 500', main = 'Model Forecasts', xlim = c(2018.2, 2022.2), ylim = c(10,45))
lines(valid.ts)
lines(naive.pred$mean, col = 'orange')
lines(snaive.pred$mean, col = 'yellow')
lines(ma.trailing.pred, col = 'pink')
lines(train.lm.trend.season.pred$mean, col = 'blue')
lines(train.lm.trend.season.pred$mean+train.res.arima.pred$mean, col = 'goldenrod3')
lines(hwin.pred$mean, col = 'green')
lines(train.arima.pred$mean, col = 'purple')
lines(train.tbats.pred$mean, col = 'red')
legend('topleft',
inset = 0.05,
legend = c('Naive', 'Seasonal Naive', 'Trailing moving average', 'Polynomial trend + Seasonality', 'Polynomial trend + Seasonality + ARIMA', 'Holt-Winter', 'Auto ARIMA', 'TBATS'),
col = c('orange', 'yellow', 'pink', 'blue', 'goldenrod3', 'green', 'purple', 'red'),
cex = 0.7,
lwd = 1)

# Model accuracy
accuracy(naive.pred$mean, valid.ts)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set -6.1925 6.401037 6.1925 -23.31453 23.31453 0.5498945 6.339083
accuracy(snaive.pred$mean, valid.ts)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set -5.3625 8.124154 7.509167 -21.01205 28.11372 0.7089354 7.890078
accuracy(ma.trailing.pred, valid.ts)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set -5.3625 5.60202 5.3625 -20.23537 20.23537 0.5498945 5.571346
accuracy(train.lm.trend.season.pred$mean, valid.ts)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 1.991849 2.480185 1.991849 7.162022 7.162022 0.342672 1.974617
accuracy(train.lm.trend.season.pred$mean + train.res.arima.pred$mean, valid.ts)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 2.832528 3.72647 3.209136 10.79542 12.12842 0.5328737 3.788331
accuracy(hwin.pred$mean, valid.ts)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set -4.806277 4.966847 4.806277 -18.09073 18.09073 0.4916211 4.905126
accuracy(train.arima.pred$mean, valid.ts)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 2.480806 2.825181 2.641931 9.352676 9.880953 0.2414805 2.822854
accuracy(train.tbats.pred$mean, valid.ts)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 0.5196247 1.584093 1.193899 2.014651 4.433675 0.7440661 1.557351
# Forecast using our best model
P_E.tbats <- tbats(P_E.window.ts)
P_E.tbats.pred <- forecast(P_E.tbats, h = 12)
plot(P_E.tbats.pred, include = 36)
lines(P_E.tbats.pred$mean, col = 'blue')

P_E.tbats.pred
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Mar 2022 24.27405 22.31664 26.40315 21.345171 27.60481
## Apr 2022 23.79090 20.46717 27.65438 18.900006 29.94744
## May 2022 23.41131 18.94581 28.92933 16.937821 32.35892
## Jun 2022 23.11201 17.63927 30.28272 15.288165 34.93977
## Jul 2022 22.87532 16.50199 31.71014 13.882114 37.69458
## Aug 2022 22.68772 15.50415 33.19966 12.674209 40.61260
## Sep 2022 22.53875 14.62293 34.73963 11.629701 43.68083
## Oct 2022 22.42027 13.83996 36.32008 10.720848 46.88701
## Nov 2022 22.32594 13.14025 37.93290 9.925208 50.22037
## Dec 2022 22.25076 12.51142 39.57156 9.224529 53.67172
## Jan 2023 22.19080 11.94326 41.23092 8.603919 57.23340
## Feb 2023 22.14295 11.42729 42.90694 8.051189 60.89909
# Check residuals
checkresiduals(naive.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(snaive.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(train.lm.trend.season.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(train.res.arima.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(hwin.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(train.arima.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(train.tbats.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(P_E.tbats.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

Price to Sales Anlaysis
#Price to Sales
PS_URL <- read_html('https://www.multpl.com/s-p-500-price-to-sales/table/by-quarter')
allTables <- PS_URL %>% html_table(fill = TRUE, header = TRUE)
PS_Share <- allTables[[1]]
colnames(PS_Share)[2] <- 'Price to Sales'
# Cleaning Text and Date in columns
PS_Share$`Price to Sales` <- gsub(pattern = '\n\nestimate', replacement = '', x = PS_Share$`Price to Sales`)
class(PS_Share$Date)
## [1] "character"
PS_Share$Date <- as.Date(PS_Share$Date, format = '%B %d,%Y')
PS_Share <- PS_Share[nrow(PS_Share):1,]
PS_Share$`Price to Sales` <- as.numeric(PS_Share$`Price to Sales`)
str(PS_Share)
## tibble [86 × 2] (S3: tbl_df/tbl/data.frame)
## $ Date : Date[1:86], format: "2000-12-31" "2001-03-31" ...
## $ Price to Sales: num [1:86] 1.77 1.54 1.64 1.41 1.56 1.6 1.39 1.17 1.3 1.25 ...
head(PS_Share)
## # A tibble: 6 × 2
## Date `Price to Sales`
## <date> <dbl>
## 1 2000-12-31 1.77
## 2 2001-03-31 1.54
## 3 2001-06-30 1.64
## 4 2001-09-30 1.41
## 5 2001-12-31 1.56
## 6 2002-03-31 1.6
summary(PS_Share)
## Date Price to Sales
## Min. :2000-12-31 Min. :0.800
## 1st Qu.:2006-04-22 1st Qu.:1.343
## Median :2011-08-15 Median :1.530
## Mean :2011-08-15 Mean :1.663
## 3rd Qu.:2016-12-08 3rd Qu.:1.867
## Max. :2022-02-11 Max. :3.150
PS.ts <- ts(PS_Share$`Price to Sales`, start = c(2001, 1), end = c(2021, 4), freq = 4)
head(PS.ts)
## Qtr1 Qtr2 Qtr3 Qtr4
## 2001 1.77 1.54 1.64 1.41
## 2002 1.56 1.60
# Acf Plot
Acf(PS.ts, lag.max = 4, main = '', na.action = na.pass)

fit <- Arima(diff(PS_Share$`Price to Sales`, 1), order=c(1,0,0))
fit
## Series: diff(PS_Share$`Price to Sales`, 1)
## ARIMA(1,0,0) with non-zero mean
##
## Coefficients:
## ar1 mean
## -0.1912 0.0145
## s.e. 0.1102 0.0122
##
## sigma^2 estimated as 0.01841: log likelihood=50.17
## AIC=-94.33 AICc=-94.04 BIC=-87.01
fit2 <- auto.arima(PS.ts)
fit2
## Series: PS.ts
## ARIMA(0,1,0)
##
## sigma^2 estimated as 0.01753: log likelihood=50.04
## AIC=-98.07 AICc=-98.02 BIC=-95.65
Acf(diff(PS_Share$`Price to Sales`), lag.max = 4, main = '')

# Moving average
ma.trailing <- rollmean(PS.ts, k = 4, align = 'right')
ma.centered <- ma(PS.ts, order = 4)
plot(PS.ts, ylim = c(0, 4), ylab = 'PS Ratio', xlab = 'Time', bty = 'l', xaxt = 'n', xlim = c(2001,2022.4), main = '')
axis(1, at = seq(2001, 2022.4, 1), labels = format(seq(2001, 2022.4, 1)))
lines(ma.centered, lwd = 2)
lines(ma.trailing, lwd = 2, lty = 2)
legend(2013,120, c('PE Ratio','Centered Moving Average', 'Trailing Moving Average'), lty=c(1,1,2), lwd=c(1,2,2), bty = 'n')

nValid <- 4
nTrain <- length(PS.ts) - nValid
train.ts <- window(PS.ts, start = c(2001, 1), end = c(2001, nTrain))
valid.ts <- window(PS.ts, start = c(2001, nTrain + 1), end = c(2001, nTrain + nValid))
# Naive model
naive.pred <- naive(train.ts, h = nValid)
# Seasonal naive model
snaive.pred <- snaive(train.ts, h = nValid)
# Trailing moving average model
ma.trailing <- rollmean(train.ts, k = 4, align = 'right')
last.ma <- tail(ma.trailing, 1)
ma.trailing.pred <- ts(rep(last.ma, nValid), start = c(2001, nTrain + 1), end = c(2001, nTrain + nValid), freq = 4)
# Polynomial trend + seasonality model
train.lm.trend.season <- tslm(train.ts ~ trend + I(trend^2) + season)
train.lm.trend.season.pred<-forecast(train.lm.trend.season, h = nValid, level = 0)
# Apply ARIMA on Polynomial trend + seasonality model's residuals
train.res.arima <- Arima(train.lm.trend.season$residuals, order = c(1,0,0))
train.res.arima.pred <- forecast(train.res.arima, h = nValid)
# Holt-Winter model
hwin <- ets(train.ts)
hwin.pred <- forecast(hwin, h = nValid, level = 0)
# Auto ARIMA model
train.arima <- auto.arima(train.ts)
summary(train.arima)
## Series: train.ts
## ARIMA(0,1,0)
##
## sigma^2 estimated as 0.01705: log likelihood=48.73
## AIC=-95.46 AICc=-95.41 BIC=-93.09
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.008772125 0.1297595 0.09377212 0.04772998 6.364785 0.5728844
## ACF1
## Training set -0.1915076
train.arima.pred <-forecast(train.arima, h = nValid, level = 0)
# TBATS model
train.tbats <- tbats(train.ts)
train.tbats.pred <- forecast(train.tbats, h = nValid, level = 0)
summary(train.tbats)
## Length Class Mode
## lambda 0 -none- NULL
## alpha 1 -none- numeric
## beta 0 -none- NULL
## damping.parameter 0 -none- NULL
## gamma.values 0 -none- NULL
## ar.coefficients 0 -none- NULL
## ma.coefficients 0 -none- NULL
## likelihood 1 -none- numeric
## optim.return.code 1 -none- numeric
## variance 1 -none- numeric
## AIC 1 -none- numeric
## parameters 2 -none- list
## seed.states 1 -none- numeric
## fitted.values 80 ts numeric
## errors 80 ts numeric
## x 80 -none- numeric
## seasonal.periods 0 -none- NULL
## y 80 ts numeric
## call 2 -none- call
## series 1 -none- character
## method 1 -none- character
# Plot model forecasts
plot(PS.ts, xlab = 'Time', ylab = 'PS Ratio of S&P 500', main = 'Model Forecasts', xlim = c(2018.2, 2022.2), ylim = c(1, 3.5), bty = "l")
lines(naive.pred$mean, col = 'orange')
lines(snaive.pred$mean, col = 'yellow')
lines(ma.trailing.pred, col = 'pink')
lines(train.lm.trend.season.pred$mean, col = 'Blue')
lines(train.lm.trend.season.pred$mean+train.res.arima.pred$mean, col = 'goldenrod3')
lines(hwin.pred$mean, col = 'green')
lines(train.arima.pred$mean, col = 'purple')
lines(train.tbats.pred$mean, col = 'red')
legend('topleft',
inset = 0.05,
legend = c('Naive', 'Seasonal Naive', 'Trailing moving average', 'Polynomial trend + Seasonality', 'Polynomial trend + Seasonality + ARIMA', 'Holt-Winter', 'Auto ARIMA', 'TBATS'),
col = c('orange', 'yellow', 'pink', 'blue', 'goldenrod3', 'green', 'purple', 'red'),
cex = 0.7,
lwd = 1)

# Model accuracy
accuracy(naive.pred$mean, valid.ts)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 0.38 0.3852921 0.38 13.29008 13.29008 6.167906e-16 4.579196
accuracy(snaive.pred$mean, valid.ts)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 0.64 0.6851277 0.64 22.41084 22.41084 -0.2341137 8.374095
accuracy(ma.trailing.pred, valid.ts)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 0.64 0.6431563 0.64 22.41744 22.41744 6.167906e-16 7.458677
accuracy(train.lm.trend.season.pred$mean, valid.ts)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 0.3267123 0.3307319 0.3267123 11.44093 11.44093 -0.2822204 3.841602
accuracy(train.lm.trend.season.pred$mean + train.res.arima.pred$mean, valid.ts)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 0.2919218 0.2967909 0.2919218 10.21278 10.21278 -0.2481899 3.512237
accuracy(hwin.pred$mean, valid.ts)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 0.4418436 0.4464031 0.4418436 15.46111 15.46111 6.167906e-16 5.263174
accuracy(train.arima.pred$mean, valid.ts)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 0.38 0.3852921 0.38 13.29008 13.29008 6.167906e-16 4.579196
accuracy(train.tbats.pred$mean, valid.ts)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 0.4531414 0.4575884 0.4531414 15.85772 15.85772 6.167906e-16 5.388213
# Forecast using our best model
P_S.lm.trend.season <- tslm(PS.ts ~ trend + I(trend^2) + season)
P_S.lm.trend.season.pred <- forecast(P_S.lm.trend.season, h = nValid, level = 0)
P_S.Arima <- Arima(P_S.lm.trend.season.pred$residuals, order = c(1,0,0))
P_S.Arima.pred <- forecast(P_S.Arima, h = 4)
plot(P_S.lm.trend.season.pred, include = 12)
lines(valid.ts)

P_S.lm.trend.season.pred$mean + P_S.Arima.pred$mean
## Qtr1 Qtr2 Qtr3 Qtr4
## 2022 2.899738 2.898630 2.950132 2.972535
# Check residuals
checkresiduals(naive.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(snaive.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(train.lm.trend.season.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(train.res.arima.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(hwin.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(train.arima.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(train.tbats.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

Price to Book Anlaysis
#Book Value per Share
P_B_URL <- read_html('https://www.multpl.com/s-p-500-book-value/table/by-quarter')
allTables <- P_B_URL %>% html_table(fill = TRUE, header = TRUE)
P_B_Ratio <- allTables[[1]]
colnames(P_B_Ratio)[2] <- 'Book Value per Share'
# Cleaning Text and Date in columns
P_B_Ratio$`Book Value per Share` <- gsub(pattern = '\n\nestimate', replacement = '', x = P_B_Ratio$`Book Value per Share`)
class(P_B_Ratio$Date)
## [1] "character"
P_B_Ratio$Date <- as.Date(P_B_Ratio$Date, format = '%B %d,%Y')
P_B_Ratio <- P_B_Ratio[nrow(P_B_Ratio):1,]
P_B_Ratio$`Book Value per Share` <- as.numeric(P_B_Ratio$`Book Value per Share`)
str(P_B_Ratio)
## tibble [88 × 2] (S3: tbl_df/tbl/data.frame)
## $ Date : Date[1:88], format: "1999-12-31" "2000-03-31" ...
## $ Book Value per Share: num [1:88] 291 296 313 320 326 ...
head(P_B_Ratio)
## # A tibble: 6 × 2
## Date `Book Value per Share`
## <date> <dbl>
## 1 1999-12-31 291.
## 2 2000-03-31 296.
## 3 2000-06-30 313.
## 4 2000-09-30 320.
## 5 2000-12-31 326.
## 6 2001-03-31 347.
summary(P_B_Ratio)
## Date Book Value per Share
## Min. :1999-12-31 Min. :290.7
## 1st Qu.:2005-06-07 1st Qu.:434.4
## Median :2010-11-15 Median :573.5
## Mean :2010-11-14 Mean :597.8
## 3rd Qu.:2016-04-22 3rd Qu.:755.9
## Max. :2021-09-30 Max. :983.0
P_B.ts <- ts(P_B_Ratio$`Book Value per Share`, start = c(2000, 1), end = c(2021, 3), freq = 4)
P_B.window.ts <- window(P_B.ts, start = c(2000, 1), end = c(2021, 3))
library(forecast)
# Acf Plot
Acf(P_B.window.ts, lag.max = 4, main = '', na.action = na.pass)

library(zoo)
# Test for random walk
fit2 <- Arima(diff(P_B_Ratio$`Book Value per Share`, 1), order=c(1,0,0))
fit2
## Series: diff(P_B_Ratio$`Book Value per Share`, 1)
## ARIMA(1,0,0) with non-zero mean
##
## Coefficients:
## ar1 mean
## 0.2123 7.9663
## s.e. 0.1042 1.6720
##
## sigma^2 estimated as 155.4: log likelihood=-341.96
## AIC=689.92 AICc=690.21 BIC=697.32
fit3 <- auto.arima(P_B.window.ts)
fit3
## Series: P_B.window.ts
## ARIMA(0,1,1) with drift
##
## Coefficients:
## ma1 drift
## 0.2819 7.9517
## s.e. 0.1222 1.6969
##
## sigma^2 estimated as 155.1: log likelihood=-337.94
## AIC=681.88 AICc=682.18 BIC=689.25
Acf(diff(P_B_Ratio$`Book Value per Share`), lag.max = 4, main = '')

# Moving average
ma.trailing <- rollmean(P_B.window.ts, k = 4, align = 'right')
ma.centered <- ma(P_B.window.ts, order = 4)
plot(P_B.window.ts, ylim = c(0, 1000), ylab = 'PB Ratio', xlab = 'Time', bty = 'l', xaxt = 'n', xlim = c(2000,2022.3), main = '')
axis(1, at = seq(2000, 2022.3, 1), labels = format(seq(2000, 2022.3, 1)))
lines(ma.centered, lwd = 2)
lines(ma.trailing, lwd = 2, lty = 2)
legend(2013,250, c('PB Ratio','Centered Moving Average', 'Trailing Moving Average'), lty=c(1,1,2), lwd=c(1,2,2), bty = 'n')

# Data partition
nValid <- 4
nTrain <- length(P_B.window.ts) - nValid
train.ts <- window(P_B.window.ts, start = c(2000, 1), end = c(2000, nTrain))
valid.ts <- window(P_B.window.ts, start = c(2000, nTrain + 1), end = c(2000, nTrain + nValid))
# Naive model
naive.pred <- naive(train.ts, h = nValid)
# Seasonal naive model
snaive.pred <- snaive(train.ts, h = nValid)
# Trailing moving average model
ma.trailing <- rollmean(train.ts, k = 4, align = 'right')
last.ma <- tail(ma.trailing, 1)
ma.trailing.pred <- ts(rep(last.ma, nValid), start = c(2000, nTrain + 1), end = c(2000, nTrain + nValid), freq = 4)
# Polynomial trend + seasonality model
train.lm.trend.season <- tslm(train.ts ~ trend + I(trend^2) + season)
train.lm.trend.season.pred<-forecast(train.lm.trend.season, h = nValid, level = 0)
# Apply ARIMA on Polynomial trend + seasonality model's residuals
train.res.arima <- Arima(train.lm.trend.season$residuals, order = c(2,0,0))
train.res.arima.pred <- forecast(train.res.arima, h = nValid)
# Holt-Winter model
hwin <- ets(train.ts)
hwin.pred <- forecast(hwin, h = nValid, level = 0)
# Auto ARIMA model
train.arima <- auto.arima(train.ts)
summary(train.arima)
## Series: train.ts
## ARIMA(0,1,1) with drift
##
## Coefficients:
## ma1 drift
## 0.2556 7.4973
## s.e. 0.1254 1.7061
##
## sigma^2 estimated as 155.9: log likelihood=-322.39
## AIC=650.79 AICc=651.1 BIC=658.01
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.01513591 12.25872 8.200039 -0.07159864 1.558848 0.219262
## ACF1
## Training set -0.02016012
train.arima.pred <-forecast(train.arima, h = nValid, level = 0)
# TBATS model
train.tbats <- tbats(train.ts)
train.tbats.pred <- forecast(train.tbats, h = nValid, level = 0)
summary(train.tbats)
## Length Class Mode
## lambda 0 -none- NULL
## alpha 1 -none- numeric
## beta 1 -none- numeric
## damping.parameter 1 -none- numeric
## gamma.values 0 -none- NULL
## ar.coefficients 0 -none- NULL
## ma.coefficients 0 -none- NULL
## likelihood 1 -none- numeric
## optim.return.code 1 -none- numeric
## variance 1 -none- numeric
## AIC 1 -none- numeric
## parameters 2 -none- list
## seed.states 2 -none- numeric
## fitted.values 83 ts numeric
## errors 83 ts numeric
## x 166 -none- numeric
## seasonal.periods 0 -none- NULL
## y 83 ts numeric
## call 2 -none- call
## series 1 -none- character
## method 1 -none- character
# Plot model forecasts
plot(P_B.window.ts, xlab = 'Time', ylab = 'PB Ratio of S&P 500', main = 'Model Forecasts', xlim = c(2018.2, 2021.5), ylim = c(800, 1000))
lines(valid.ts)
lines(naive.pred$mean, col = 'orange')
lines(snaive.pred$mean, col = 'yellow')
lines(ma.trailing.pred, col = 'pink')
lines(train.lm.trend.season.pred$mean, col = 'blue')
lines(train.lm.trend.season.pred$mean+train.res.arima.pred$mean, col = 'goldenrod3')
lines(hwin.pred$mean, col = 'green')
lines(train.arima.pred$mean, col = 'purple')
lines(train.tbats.pred$mean, col = 'red')
legend('topleft',
inset = 0.05,
legend = c('Naive', 'Seasonal Naive', 'Trailing moving average', 'Polynomial trend + Seasonality', 'Polynomial trend + Seasonality + ARIMA', 'Holt-Winter', 'Auto ARIMA', 'TBATS'),
col = c('orange', 'yellow', 'pink', 'blue', 'goldenrod3', 'green', 'purple', 'red'),
cex = 0.7,
lwd = 1)

# Model accuracy
accuracy(naive.pred$mean, valid.ts) # MAPE 3.98
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 37.06 41.58819 37.06 3.908406 3.908406 0.1442764 2.401469
accuracy(snaive.pred$mean, valid.ts)# MAPE 3.980919
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 37.7825 44.34693 37.7825 3.980919 3.980919 0.2706733 2.575777
accuracy(ma.trailing.pred, valid.ts)# MAPE 3.985378
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 37.7825 42.2333 37.7825 3.985378 3.985378 0.1442764 2.435947
accuracy(train.lm.trend.season.pred$mean, valid.ts) # MAPE 0.971184
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set -3.071286 9.415854 9.138326 -0.3455418 0.971184 -0.005754907 0.4652116
accuracy(train.lm.trend.season.pred$mean + train.res.arima.pred$mean, valid.ts) # MAPE 0.5372006
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 5.161234 8.485842 5.161234 0.5372006 0.5372006 -0.1607624 0.496166
accuracy(hwin.pred$mean, valid.ts) # MAPE 2.033524
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 19.31247 22.57404 19.31247 2.033524 2.033524 0.02753156 1.292138
accuracy(train.arima.pred$mean, valid.ts) # MAPE 1.430108
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 13.64012 17.72267 13.64012 1.430108 1.430108 0.01580931 1.030082
accuracy(train.tbats.pred$mean, valid.ts) # MAPE 1.132376
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 10.82925 15.15895 10.82925 1.132376 1.132376 -0.009468737 0.8841638
# Forecast using our best model
P_B.lm.trend.season <- tslm(P_B.window.ts ~ trend + I(trend^2) + season)
P_B.lm.trend.season.pred <- forecast(P_B.lm.trend.season, h = nValid, level = 0)
P_B.Arima <- Arima(P_B.lm.trend.season.pred$residuals, order = c(2,0,0))
P_B.Arima.pred <- forecast(P_B.Arima, h = 5)
plot(P_B.lm.trend.season.pred, include = 12)
lines(valid.ts)

P_B.lm.trend.season.pred$mean + P_B.Arima.pred$mean
## Qtr1 Qtr2 Qtr3 Qtr4
## 2021 984.2310
## 2022 987.2361 994.3101 1005.2300
# Check residuals
checkresiduals(naive.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(snaive.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(train.lm.trend.season.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(train.res.arima.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(hwin.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(train.arima.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(train.tbats.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.
